#setting-up the working directory
#use the setwd function to adjust the path, for example: 
#setwd('C:/taxbase')

#loading packages. For uninstalled package, use the function install.packages.
library(tidyverse)
library(rworldmap)
library(lme4)
library(stargazer)
library(DescTools)
library(ggpubr)
library(jtools)
library(mice)
library(mediation)
library(interactions)


###Start of main analysis####
#Loading Latinobaromtero 2020 survey 2020
load("Latinobarometro_2020_Eng_Rdata_v1_0.rdata")

#changing variables names and clearing data
lb2020 <- Latinobarometro_2020_Eng
lb2020$idenpa[lb2020$idenpa == 32] <- "Argentina"
lb2020$idenpa[lb2020$idenpa == 68] <- "Bolivia"
lb2020$idenpa[lb2020$idenpa == 76] <- "Brazil"
lb2020$idenpa[lb2020$idenpa == 152] <- "Chile"
lb2020$idenpa[lb2020$idenpa == 170] <- "Colombia"
lb2020$idenpa[lb2020$idenpa == 188] <- "Costa Rica"
lb2020$idenpa[lb2020$idenpa == 214] <- "Dominican Republic"
lb2020$idenpa[lb2020$idenpa == 218] <- "Ecuador"
lb2020$idenpa[lb2020$idenpa == 222] <- "El Salvador"
lb2020$idenpa[lb2020$idenpa == 320] <- "Guatemala"
lb2020$idenpa[lb2020$idenpa == 340] <- "Honduras"
lb2020$idenpa[lb2020$idenpa == 484] <- "Mexico"
lb2020$idenpa[lb2020$idenpa == 558] <- "Nicaragua"
lb2020$idenpa[lb2020$idenpa == 591] <- "Panama"
lb2020$idenpa[lb2020$idenpa == 600] <- "Paraguay"
lb2020$idenpa[lb2020$idenpa == 604] <- "Peru"
lb2020$idenpa[lb2020$idenpa == 858] <- "Uruguay"
lb2020$idenpa[lb2020$idenpa == 862] <- "Venezuela"
lb2020$sexo[lb2020$sexo == 1] <- 0
lb2020$sexo[lb2020$sexo == 2] <- 1

tax <-
  lb2020 %>% dplyr::select(
    numinves,
    idenpa,
    reg,
    ciudad,
    tamciud,
    edad,
    reedad,
    sexo,
    reeduc.1,
    s5npn,
    p13st.d,
    p13st.e,
    p13st.f,
    p20st.a,
    P11STGBS.A,
    p15n.j,
    p72npn,
    s8npn.b,
    s9npn,
    s1,
    p18st,
    p19st.a,
    p73npn.5,
    p73npn.4,
    p73npn.3,
    p73npn.2,
    p73npn.1,
    P75NPN_14,
    P75NPN_15,
    p42n,
    p43n,
    p45n,
    P16N.B_02
  )
colnames(tax) <-
  c(
    'year',
    'country',
    'countryregion',
    'city',
    'citysize',
    'age',
    'agegroup',
    'female',
    'educgroup',
    'subincome',
    'congpartrust',
    'govtrust',
    'juditrust',
    'supdemoc',
    'satdemoc',
    'trustpubhosp',
    'inqtolerance',
    'taxgroup',
    'taxprog',
    'socialclass',
    'polideo',
    'unfairness.dist',
    'q5per',
    'q4per',
    'q3per',
    'q2per',
    'q1per',
    'incomeinqworst',
    'richpoorworst',
    'fightcrime',
    'localauthority',
    'complaint',
    'meritocracy'
  )

tax <- tax %>% relocate(year, .after = country)
tax %>% summarize(n = n_distinct(country))
#adding region (central/south america) and grouping together
tax <- tax %>% dplyr::mutate(region = ifelse(country %in% c('Argentina','Brazil','Bolivia','Chile','Colombia','Ecuador','Paraguay','Peru','Uruguay','Venezuela'),'South America','Central America'))
tax %>% group_by(region) %>% summarize(n = n_distinct(country))

#recode meritocracy as the value '0' implies belief in meritocracy
table(tax$meritocracy)
tax <- tax %>% dplyr::mutate(meritocracy = ifelse(meritocracy == 0,1,
                                                  ifelse(meritocracy == 1,0,meritocracy)))
table(tax$meritocracy)
#assignment to be used for separate analysis to examine balance between main control variables (see Table A2)
tax.b <- tax 

#tax preferences - inferred by starting from which income group individuals should pay taxes 
#filtering out -5 (when respondents don't know or have no answer) 
table(tax$taxgroup)
tax <- tax %>% dplyr::filter(!(taxgroup == -5))
tax %>% group_by(taxgroup) %>% summarize(n = n())

#tax base preferences - stylised facts
tax <-
  tax %>% dplyr::mutate(
    masstax = ifelse(dplyr::between(taxgroup, 1, 5), 1, ifelse(taxgroup == 11, 1, 0)),
    classtax = ifelse(taxgroup == 10,1,0),
    taxthreshold = ifelse(taxgroup == 12,11,ifelse(taxgroup == 11,0,taxgroup))
  )
#option 1 - displaying mean value by country of from which income group should taxes should be paid; 
#using the taxthreshold variable
taxbase <-
  tax %>% group_by(country) %>% summarize(
    n = n(),
    taxthreshold = mean(taxthreshold)
  )
#visualising data
taxbase_map <-
  joinCountryData2Map(taxbase, joinCode = 'NAME', nameJoinColumn = 'country')
#add coordinates to include country names to the map
country_coord <- data.frame(coordinates(taxbase_map),stringsAsFactors = FALSE)
latam_countries <- c('Argentina',
                     'Brazil',
                     'Bolivia',
                     'Chile',
                     'Colombia',
                     'Ecuador',
                     'Paraguay',
                     'Peru',
                     'Uruguay',
                     'Venezuela',
                     'Mexico',
                     'Honduras',
                     'Panama',
                     'Guatemala',
                     'Costa Rica',
                     'El Salvador',
                     'Dominican Republic',
                     'Nicaragua')
country_coord <- country_coord[latam_countries, ]
####Figure 2####
# pdf(file = 'Figures/PDF/figure2.pdf')
map1 <-
  mapCountryData(
    taxbase_map,
    nameColumnToPlot = 'taxthreshold',
    mapRegion = "Latin America",
    catMethod = seq(3.95,7.41,by = 0.01),
    colourPalette = hcl.colors(n = 346,palette = "Emrld", rev = TRUE),
    mapTitle = ""
  )
text(x = country_coord$X1,y = country_coord$X2, labels = row.names(country_coord), cex = 0.5)
# dev.off()
#option 2 - masstax
taxbase_broad <-
  tax %>% group_by(country) %>% summarize(
    n = n(),
    masstax = mean(masstax)*100,
    classtax = mean(classtax)*100
  )
#visualising data
taxbase_broad_map <-
  joinCountryData2Map(taxbase_broad,
                      joinCode = 'NAME',
                      nameJoinColumn = 'country')
####Figure A1####
# pdf(file = 'Figures/PDF/figurea1.pdf')
map2 <-
  mapCountryData(
    taxbase_broad_map,
    nameColumnToPlot = 'masstax',
    mapRegion = "Latin America",
    catMethod = seq(23.6,68,by = 0.01),
    colourPalette = hcl.colors(n = 4440,palette = "Emrld"),
    mapTitle = ""
  )
text(x = country_coord$X1,y = country_coord$X2, labels = row.names(country_coord), cex = 0.5)
# dev.off()

#comparison 
taxbase_broad %>% arrange(desc(masstax))
taxbase_broad %>% arrange(desc(classtax))
taxbase %>% arrange(taxthreshold)
tax %>% summarize(mean = mean(classtax))
tax %>% summarize(mean = mean(masstax))
table(tax$masstax)

#loading actual inequality data
ineqact <-
  read_csv('actual_inequality.csv',skip = 2) 
ineqact <- ineqact %>% dplyr::mutate(gini_coef = gini_index / 100)
#add new Gini calculation, giniq, based on income quintiles
#to compare with the subjective Gini 
ineqact <-
  ineqact %>% dplyr::mutate(giniq = apply(ineqact[,c('q5actual','q4actual','q3actual','q2actual','q1actual')],1,DescTools::Gini,unbiased = FALSE))
tax <- tax %>% left_join(dplyr::select(ineqact,gini_coef,giniq,country))

#creating the subjective Gini coefficient
#investigate by variable to look into no responses
table(tax$q5per)
table(tax$q4per)
table(tax$q3per)
table(tax$q2per)
table(tax$q1per)

#excluding observations in which all five variables affecting the subjective Gini coefficient are missing
tax %>% dplyr::filter(q5per < 0 & q4per < 0 & q3per < 0 & q2per < 0 & q1per < 0) %>% count()
ineqtax <- tax %>% dplyr::filter(!(q5per < 0 & q4per < 0 & q3per < 0 & q2per < 0 & q1per < 0)) %>% dplyr::mutate(sumper = q1per + q2per + q3per + q4per + q5per)


#excluding observations where it is estimated that a poorer group has more money than richer group (according to the instructions in the questionnaire)
#this also works when no responses (coded originally as -2) are referred to as 0
#(in robustness checks Table A11, start commenting out from the line below)
tax %>% dplyr::filter(q1per > q2per | q1per > q3per | q1per > q4per | q1per > q5per |
                        q2per > q3per | q2per > q4per | q2per > q5per |
                        q3per > q4per | q3per > q5per |
                        q4per > q5per) %>% count()

ineqtax <- ineqtax %>% dplyr::filter(!(q1per > q2per | q1per > q3per | q1per > q4per | q1per > q5per |
                                         q2per > q3per | q2per > q4per | q2per > q5per |
                                         q3per > q4per | q3per > q5per |
                                         q4per > q5per))

#first approach for dealing with missing values - keeping all observations besides those with missing data in all five quintiles
#start commenting out from the line below for robustness check in Table A10 
ineqtax <- ineqtax %>% dplyr::mutate(ineqna = as.numeric(q5per == -2) + as.numeric(q4per == -2) + as.numeric(q3per == -2) + as.numeric(q2per == -2) + as.numeric(q1per == -2))
table(ineqtax$ineqna)
ineqtax %>% dplyr::filter(dplyr::between(ineqna,1,4)) %>% count()
#example
ineqtax %>% dplyr::filter(ineqna == 4) %>% distinct(q1per,q2per,q3per,q4per,q5per)

#changing no answer (coded as -2) to 0
ineqtax <- ineqtax %>% dplyr::mutate(q1per = ifelse(q1per == -2,0,q1per),
                                     q2per = ifelse(q2per == -2,0,q2per),
                                     q3per = ifelse(q3per == -2,0,q3per),
                                     q4per = ifelse(q4per == -2,0,q4per),
                                     q5per = ifelse(q5per == -2,0,q5per))
#sum across all five groups
ineqtax <- ineqtax %>% dplyr::mutate(sumper = q1per + q2per + q3per + q4per + q5per)
ineqtax %>% dplyr::filter(dplyr::between(ineqna,1,4) & sumper == 100) %>% count()
ineqtax %>% dplyr::filter(sumper != 100) %>% count()
ineqtax %>% dplyr::filter(sumper != 100) %>% distinct(q1per,q2per,q3per,q4per,q5per)
ineqtax %>% dplyr::filter(ineqna == 4) %>% dplyr::select(q1per,q2per,q3per,q4per,q5per)
#(end comment out in robustness checks Table A10,A11)

#second approach for dealing with missing values - excluding observations with missing variables for one (or more) of quintiles
#the assignment below should be included in robustness checks Tables A10-A11
# ineqtax <- ineqtax %>% dplyr::filter(!(q5per < 0 | q4per < 0 | q3per < 0 | q2per < 0 | q1per < 0)) %>%
#   dplyr::mutate(sumper = q1per + q2per + q3per + q4per + q5per)
# ineqtax %>% dplyr::filter(sumper != 100) %>% count()

# ineqtax <- ineqtax %>% dplyr::filter(sumper == 100) #should be included in Table A11

#calculating Gini coefficient for each observation
ineqtax$giniper <- apply(ineqtax[,c('q1per','q2per','q3per','q4per','q5per')],1,DescTools::Gini,unbiased = FALSE) 
summary(ineqtax$giniper)
table(ineqtax$giniper)

#example - demonstration
ineqtax %>% dplyr::filter(q1per == 5 & q2per == 10 & q3per == 15 & q4per == 20 & q5per == 50) %>% dplyr::select(giniper)
ex1 <- Lc(c(5,10,15,20,50))
ex2 <- Lc(c(20,20,20,20,20))
ex3 <- Lc(c(0,0,0,0,100))

#changing visualisation settings to present three figures in the same row 
####Figure A2####
# pdf(file = 'Figures/PDF/figurea2.pdf',width = 12, height = 7)
par(mfrow = c(1,3))
plot(ex1,main = '(a) Subjective Gini = 0.4 (5,10,15,20,50)',xlab = 'Cumulative share of population',y = 'Cumulative share of wealth',
     cex.axis = 1.25,cex.lab = 1.5,cex.main = 1.5)
text(x = 0.55,y = 0.4,"A",cex = 3)
text(x = 0.8,y = 0.2,"B",cex = 3)
points(x = c(0.2,0.4,0.6,0.8),y = c(0.05,0.15,0.3,0.5),cex = 2)
plot(ex2,main = '(b) Subjective Gini = 0 (20,20,20,20,20)',xlab = 'Cumulative share of population',y = 'Cumulative share of wealth',
     cex.axis = 1.25,cex.lab = 1.5,cex.main = 1.5)
points(x = c(0.2,0.4,0.6,0.8),y = c(0.2,0.4,0.6,0.8),cex = 2)
plot(ex3,main = '(c) Subjective Gini = 0.8 (0,0,0,0,100)',xlab = 'Cumulative share of population',y = 'Cumulative share of wealth',
     cex.axis = 1.25,cex.lab = 1.5,cex.main = 1.5)
points(x = c(0.2,0.4,0.6,0.8),y = c(0,0,0,0),cex = 2)
# dev.off()

#changing back to the default
par(mfrow = c(1,1))

#plot histogram 
ineqtax %>% ggplot(aes(x = giniper)) +
  geom_histogram(bins = 30) 

#plot histogram by country
theme_set(theme_bw())
ineqtax %>% ggplot(aes(x = giniper)) +
  geom_histogram(bins = 30, alpha = 0.5) +
  theme(strip.text.x = element_text(size = 14),legend.position = "none") +
  labs(x = 'Subjective Gini coefficient', y = '') +
  facet_wrap(~ country)

#plot density by country
theme_set(theme_bw())
####Figure 3####
# pdf(file = 'Figures/PDF/figure3.pdf',width = 10)
ineqtax %>% ggplot(aes(x = giniper)) +
  geom_density(linetype = "dashed",fill = 'dodgerblue3') +
  theme(strip.text.x = element_text(size = 14),legend.position = "none") +
  labs(x = 'Subjective Gini coefficient', y = '') +
  facet_wrap(~ country) 
# dev.off()

#actual (income) and subjective (perceived) Gini coefficients by country;
#differences between actual inequality and mean subjective (perceived) inequality

ineqcountry <- ineqtax %>% group_by(country) %>%
  summarize(ginipermean = mean(giniper),ginipermedian = median(giniper)) %>%
  left_join(dplyr::select(ineqact, gini_coef, giniq, country)) %>%
  dplyr::mutate(diff = gini_coef - ginipermean, diffq = giniq - ginipermean)
ineqcountry <-
  ineqcountry %>% left_join(distinct(tax, region, country)) %>%
  relocate(region,.after = country)
ineqcountry %>% arrange(desc(ginipermean))
ineqcountry %>% arrange(desc(diff))
ineqcountry %>% arrange(desc(diffq))

#visualise subjective Gini coefficient by country
#order by mean 
# theme_set(theme_light())
####FIgure A3####
# pdf(file = 'Figures/PDF/figurea3.pdf', width = 10)
ineqcountry %>% ggplot(aes(x = reorder(country,ginipermean), y = ginipermean)) +
  geom_col(width = 0.8,alpha = 0.8,fill = 'darkblue') +
  geom_text(size = 5,aes(label = round(ginipermean,2), hjust = -0.1)) +
  coord_flip() +
  ylim(0,1) +
  labs(x = ' ', y = 'Subjective Gini coefficient') +
  theme(axis.text = element_text(size = 15),axis.title = element_text(size = 15)) 
# dev.off()

#order by median
ineqcountry %>% ggplot(aes(x = reorder(country,ginipermedian), y = ginipermedian)) +
  geom_col(fill = 'darkblue') +
  geom_text(aes(label = round(ginipermedian,2), hjust = -0.1)) +
  coord_flip() +
  ylim(0,1) +
  labs(x = ' ', y = 'Subjective Gini coefficient',title = '(B): Median subjective Gini coefficient') +
  theme(axis.text = element_text(size = 12)) 


#comparison between actual Gini (based on quintiles of income) and subjective gini 
#importantly, subjective Gini coefficient is based on a question asking about money 
#(which can be seen more as a proxy of wealth) 
####Figure A4####
# pdf(file = 'Figures/PDF/figurea4.pdf',width = 10)
ineqcountry %>%
  ggplot(aes(x = giniq, y = ginipermean, label = country)) +
  geom_text() +
  geom_point(size = 0.25) +
  xlim(0.2,0.6) +
  ylim(0.2,0.6) +
  labs(x = 'Gini coefficient (based on quintiles of income)', y = 'Mean subjective Gini coefficient') +
  geom_abline(slope = 1) +
  theme_bw()
# dev.off()


####Regression analysis ####

#linear regression with fixed effects
table(ineqtax$taxthreshold)
m1 <-
  lm(taxthreshold ~ giniper + factor(country),
     data = ineqtax)
summary(m1)

#no fixed effects
m1_nf <-
  lm(taxthreshold ~ giniper,
     data = ineqtax)
summary(m1_nf)

#logistic regression
m2 <-
  glm(masstax ~ giniper + factor(country),
      family = binomial,
      data = ineqtax)
summary(m2)

#no fixed effects
m2_nf <-
  glm(masstax ~ giniper,
      family = binomial,
      data = ineqtax)
summary(m2_nf)


m3 <-
  glm(classtax ~ giniper + factor(country),
      family = binomial,
      data = ineqtax)
summary(m3)

#no fixed effects
m3_nf <-
  glm(classtax ~ giniper,
      family = binomial,
      data = ineqtax)
summary(m3_nf)


#### Extended model - adding control variables ####
ineqtaxext <-
  ineqtax %>% dplyr::filter(!(subincome < 1 | polideo == 97 | polideo < 0 | supdemoc < 1 | govtrust < 1 | satdemoc < 1 | socialclass < 1 |
                                juditrust < 1 | congpartrust < 1 | educgroup < 1))
#summary statistics
summarystats <- ineqtaxext %>% dplyr::select(masstax,classtax,taxgroup,taxthreshold,giniper,subincome,socialclass,polideo,supdemoc,satdemoc,govtrust,juditrust,congpartrust,educgroup,age,female)
####Table A1####
stargazer(summarystats,title = 'Summary statistics',covariate.labels = c('Masstax','Classtax','Taxgroup','Taxthreshold','Subjective Gini','Subjective Household Income','Subjective Social Class','Political Ideology ','Support for Democracy','Satisfaction with Democracy','Trust in Government','Trust in Judiciary','Trust in Congress/Parliament','Education Group','Age','Female'))

#Separate analysis to examine balance between main control variables 
tax.b <- tax.b %>%
  dplyr::mutate(models_wc = as.numeric(taxgroup != -5 & !(q5per < 0 & q4per < 0 & q3per < 0 & q2per < 0 & q1per < 0) &
                                         !(q1per > q2per | q1per > q3per | q1per > q4per | q1per > q5per |
                                             q2per > q3per | q2per > q4per | q2per > q5per |
                                             q3per > q4per | q3per > q5per |
                                             q4per > q5per) == 1 & (!(subincome < 1 | polideo == 97 | polideo < 0 | supdemoc < 1 | govtrust < 1 | satdemoc < 1 | socialclass < 1 |
                                                                        juditrust < 1 | congpartrust < 1 | educgroup < 1))))

table(tax.b$models_wc)
summaryfull <- tax.b %>% dplyr::mutate(subincome_full = ifelse(subincome < 1,NA,subincome),
                                       socialclass_full = ifelse(socialclass < 1, NA, socialclass),
                                       educgroup_full = ifelse(educgroup < 1, NA, educgroup),
                                       polideo_full = ifelse(polideo == 97 | polideo < 0, NA, polideo))
summarymodels_wc <- tax.b %>% dplyr::filter(models_wc == 1)
tax.b %>% dplyr::filter(polideo < 0 | polideo == 97)  %>% count()

summaryfull_n <- summaryfull %>%
  summarize(across(.cols = c(age,female,educgroup_full,subincome_full,socialclass_full,polideo_full),
                   .fns = ~sum(!is.na(.))))
colnames(summaryfull_n) <- c('age','female','educgroup','subincome','socialclass','polideo')
summaryfull_n <- summaryfull_n %>% pivot_longer(1:6,names_to = 'variable',values_to = 'n')
summaryfull_mean <- summaryfull %>% summarize(age = mean(age),female = mean(female),educgroup = mean(educgroup_full,na.rm = TRUE),subincome = mean(subincome_full,na.rm = TRUE),socialclass = mean(socialclass_full,na.rm = TRUE),polideo = mean(polideo_full,na.rm = TRUE))
summaryfull_mean <- summaryfull_mean %>% pivot_longer(1:6,names_to = 'variable',values_to = 'mean')
summaryfull_sd <- summaryfull %>% summarize(age = sd(age),female = sd(female),educgroup = sd(educgroup_full,na.rm = TRUE),subincome = sd(subincome_full,na.rm = TRUE),socialclass = sd(socialclass_full,na.rm = TRUE),polideo = sd(polideo_full,na.rm = TRUE))
summaryfull_sd <- summaryfull_sd %>% pivot_longer(1:6,names_to = 'variable',values_to = 'sd')

summarymodels_wc_n <- summarymodels_wc %>%
  summarize(across(.cols = c(age,female,educgroup,subincome,socialclass,polideo),.fns = ~sum(!is.na(.))))
summarymodels_wc_n <- summarymodels_wc_n %>% pivot_longer(1:6,names_to = 'variable',values_to = 'n')
summarymodels_wc_mean <- summarymodels_wc %>% summarize(age = mean(age),female = mean(female),educgroup = mean(educgroup),subincome = mean(subincome),socialclass = mean(socialclass),polideo = mean(polideo))
summarymodels_wc_mean <- summarymodels_wc_mean %>% pivot_longer(1:6,names_to = 'variable',values_to = 'mean')
summarymodels_wc_sd <- summarymodels_wc %>% summarize(age = sd(age),female = sd(female),educgroup = sd(educgroup),subincome = sd(subincome),socialclass = sd(socialclass),polideo = sd(polideo))
summarymodels_wc_sd <- summarymodels_wc_sd %>% pivot_longer(1:6,names_to = 'variable',values_to = 'sd')

varcomp <- summaryfull_n %>% left_join(summaryfull_mean) %>% left_join(summaryfull_sd) %>%
  left_join(dplyr::select(summarymodels_wc_n,n,variable),by = 'variable') %>% left_join(dplyr::select(summarymodels_wc_mean,mean,variable),by = 'variable') %>%
  left_join(dplyr::select(summarymodels_wc_sd,sd,variable),by = 'variable')
varcomp <- varcomp %>% dplyr::mutate(across(where(is.numeric),~round(.,2)))
varcomp <- varcomp %>% dplyr::mutate(variable = c('Age','Female','Education Group','Subjective Household Income',
                                                  'Subjective Social class','Political Ideology '))
####Table A2####
stargazer(varcomp,title = 'Balance of main control variables',covariate.labels = c('Variable','N','Mean','SD','N','Mean','SD'),summary = FALSE,rownames = FALSE,
          font.size = 'small')

m4 <-
  lm(
    taxthreshold ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
      juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
    data = ineqtaxext
  )
summary(m4)

#no fixed effects
m4_nf <-
  lm(
    taxthreshold ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
      juditrust + congpartrust + educgroup + age + I(age^2) + female,
    data = ineqtaxext
  )
summary(m4_nf)


m5 <-
  glm(masstax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext
  )
summary(m5)

#no fixed effects
m5_nf <-
  glm(masstax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
        juditrust + congpartrust + educgroup + age + I(age^2) + female,
      family = binomial,
      data = ineqtaxext
  )
summary(m5_nf)


#calculating implications of 1 standard deviation increase in subjective gini for mean value of taxthreshold
round((mean(ineqtaxext$taxthreshold) + (sd(ineqtaxext$giniper)*0.8508480)) / mean(ineqtaxext$taxthreshold),3)
viz1 <- effect_plot(m4,pred = 'giniper',interval = TRUE)
viz1
viz2 <- effect_plot(m5,pred = 'giniper',interval = TRUE,main.title = '(a) Support for mass taxation',x.label = 'Subjective Gini coefficient',y.label = 'Predicted probability of support for mass taxation')
viz2 <- viz2 + theme(axis.text.x = element_text(size = 12),
                     axis.text.y = element_text(size = 12)) + ylim(0.1,0.5)
viz2

m6 <-
  glm(classtax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext
  )
summary(m6)

#no fixed effects
m6_nf <-
  glm(classtax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
        juditrust + congpartrust + educgroup + age + I(age^2) + female,
      family = binomial,
      data = ineqtaxext
  )
summary(m6_nf)
viz3 <- effect_plot(m6,pred = 'giniper',interval = TRUE,main.title = '(b) Support for class taxation',x.label = 'Subjective Gini coefficient',y.label = 'Predicted probability of support for class taxation')
viz3 <- viz3 + theme(axis.text.x = element_text(size = 12),
                     axis.text.y = element_text(size = 12)) + ylim(0.1,0.5)
viz3
####Figure 4####
# pdf(file = 'Figures/PDF/figure4.pdf',width = 10)
ggarrange(viz2,viz3)
# dev.off()

####Table 1####
stargazer(m1,m4,m2,m5,m3,m6,
          title = 'Determinants of preferences over the size of the tax base',
          dep.var.labels = c('taxthreshold','masstax','classtax'),
          keep = c('giniper','subincome','I(subincome^2)','socialclass','polideo','supdemoc','satdemoc','govtrust','juditrust','congpartrust','educgroup','age','I(age^2)','female','Constant'),
          covariate.labels = c("Subjective Gini","Subjective Household Income","Subjective Household Income^2","Subjective Social Class","Political Ideology ","Support for Democracy","Satisfaction with Democracy","Trust in Government","Trust in Judiciary","Trust in Congress/Parliament","Education Group","Age","Age^2","Female"),
          model.names = FALSE,
          dep.var.caption = 'Dependent variable',
          omit.stat = c("f","ser","rsq"),
          font.size = 'scriptsize',
          star.cutoffs = c(0.05,0.01,0.001))


#Ideology, social class, and perceived inequality
idetax <- 
  lm(
    taxthreshold ~ polideo + factor(country),
    data = ineqtaxext
  )
summary(idetax)

inper1 <- 
  lm(
    giniper ~ polideo + socialclass + factor(country),
    data = ineqtaxext
  )
summary(inper1)

inper2 <- 
  lm(
    giniper ~ polideo + socialclass + educgroup + age + I(age^2) + female + factor(country),
    data = ineqtaxext
  )
summary(inper1)

sctax.a <-
  lm(
    taxthreshold ~ subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
      juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
    data = ineqtaxext
  )
summary(sctax.a)

sctax.b <-
  lm(
    taxthreshold ~  giniper + subincome + I(subincome^2) + socialclass + polideo +supdemoc + satdemoc + govtrust + 
      juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
    data = ineqtaxext
  )
summary(sctax.b)


####Table A3####
stargazer(idetax,inper1,inper2,sctax.a,sctax.b,
          title = 'Perceived inequality, ideology, social class and tax policy preferences',
          keep = c('polideo','socialclass','giniper','educgroup','age','I(age^2)','female',"Constant"),
          covariate.labels = c("Subjective Gini","Political Ideology","Subjective Social Class","Education Group","Age","Age^2","Female","Constant"),
          model.names = FALSE,
          dep.var.labels = c("taxthreshold","Subjective Gini","taxthreshold"),
          dep.var.caption = 'Dependent variable',
          add.lines = list(c('Additional Control variables','No','No','No','Yes','Yes')),
          omit.stat = c("f","ser","rsq"),
          star.cutoffs = c(0.05,0.01,0.001),
          font.size = 'footnotesize')


####Robustness checks####


#Gradual addition of control variables####

##taxthreshold##
ineqtaxext2 <-
  ineqtax %>% dplyr::filter(!(govtrust < 1 |  juditrust < 1 | congpartrust < 1 | educgroup < 1))

m4gr.a <-
  lm(taxthreshold ~ giniper + govtrust +  juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
     data = ineqtaxext2
  )
summary(m4gr.a)
ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!satdemoc < 1)


m4gr.b <-
  lm(taxthreshold ~ giniper + satdemoc + govtrust +
       juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
     data = ineqtaxext2
  )
summary(m4gr.b)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!socialclass < 1)

m4gr.c <-
  lm(taxthreshold ~ giniper + socialclass + satdemoc + govtrust +
       juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
     data = ineqtaxext2
  )
summary(m4gr.c)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!supdemoc <  1)

m4gr.d <-
  lm(taxthreshold ~ giniper + socialclass + supdemoc + satdemoc + govtrust +
       juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
     data = ineqtaxext2
  )
summary(m4gr.d)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!subincome <  1)

m4gr.e <-
  lm(taxthreshold ~ giniper + subincome + I(subincome^2) + socialclass + supdemoc + satdemoc + govtrust +
       juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
     data = ineqtaxext2
  )
summary(m4gr.e)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!(polideo == 97 | polideo < 0))

m4gr.f <-
  lm(taxthreshold ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
       juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
     data = ineqtaxext2
  )
summary(m4gr.f)

####Table A4####
stargazer(m4gr.a,m4gr.b,m4gr.c,m4gr.d,m4gr.e,m4gr.f,
          title = 'Gradual addition of control variables (dep. variable: taxthreshold)',
          keep = c('giniper','subincome','I(subincome^2)','socialclass','polideo','supdemoc','satdemoc','govtrust','juditrust','congpartrust','educgroup','age','I(age^2)','female','Constant'),
          covariate.labels = c("Subjective Gini","Subjective Household Income","Subjective Household Income^2","Subjective Social Class","Political Ideology ","Support for Democracy","Satisfaction with Democracy","Trust in Government","Trust in Judiciary","Trust in Congress/Parliament","Education Group","Age","Age^2","Female"),
          model.names = FALSE,
          omit.stat = c("f","ser","rsq"),
          font.size = 'scriptsize',
          dep.var.labels.include = FALSE,
          dep.var.caption = 'Dependent variable: taxthreshold',
          star.cutoffs = c(0.05,0.01,0.001))

###masstax##

ineqtaxext2 <-
  ineqtax %>% dplyr::filter(!(govtrust < 1 |  juditrust < 1 | congpartrust < 1 | educgroup < 1))

m5gr.a <-
  glm(masstax ~ giniper + govtrust +  juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m5gr.a)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!satdemoc < 1)


m5gr.b <-
  glm(masstax ~ giniper + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m5gr.b)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!socialclass < 1)

m5gr.c <-
  glm(masstax ~ giniper + socialclass + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m5gr.c)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!supdemoc <  1)

m5gr.d <-
  glm(masstax ~ giniper + socialclass + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m5gr.d)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!subincome <  1)

m5gr.e <-
  glm(masstax ~ giniper + subincome + I(subincome^2) + socialclass + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m5gr.e)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!(polideo == 97 | polideo < 0))

m5gr.f <-
  glm(masstax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m5gr.f)

####Table A5####
stargazer(m5gr.a,m5gr.b,m5gr.c,m5gr.d,m5gr.e,m5gr.f,
          title = 'Gradual addition of control variables (dep. varaible: masstax)',
          keep = c('giniper','subincome','I(subincome^2)','socialclass','polideo','supdemoc','satdemoc','govtrust','juditrust','congpartrust','educgroup','age','I(age^2)','female','Constant'),
          covariate.labels = c("Subjective Gini","Subjective Household Income","Subjective Household Income^2","Subjective Social Class","Political Ideology ","Support for Democracy","Satisfaction with Democracy","Trust in Government","Trust in Judiciary","Trust in Congress/Parliament","Education Group","Age","Age^2","Female"),
          model.names = FALSE,
          omit.stat = c("f","ser","rsq"),
          font.size = 'scriptsize',
          dep.var.labels.include = FALSE,
          dep.var.caption = 'Dependent variable: masstax',
          star.cutoffs = c(0.05,0.01,0.001))

##classtax##


ineqtaxext2 <-
  ineqtax %>% dplyr::filter(!(govtrust < 1 |  juditrust < 1 | congpartrust < 1 | educgroup < 1))

m6gr.a <-
  glm(classtax ~ giniper + govtrust +  juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m6gr.a)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!satdemoc < 1)


m6gr.b <-
  glm(classtax ~ giniper + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m6gr.b)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!socialclass < 1)

m6gr.c <-
  glm(classtax ~ giniper + socialclass + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m6gr.c)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!supdemoc <  1)

m6gr.d <-
  glm(classtax ~ giniper + socialclass + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m6gr.d)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!subincome <  1)

m6gr.e <-
  glm(classtax ~ giniper + subincome + I(subincome^2) + socialclass + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m6gr.e)

ineqtaxext2 <-
  ineqtaxext2 %>% dplyr::filter(!(polideo == 97 | polideo < 0))

m6gr.f <-
  glm(classtax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext2
  )
summary(m6gr.f)

####Table A6####
stargazer(m6gr.a,m6gr.b,m6gr.c,m6gr.d,m6gr.e,m6gr.f,
          title = 'Gradual addition of control variables (dep. variable: classtax)',
          keep = c('giniper','subincome','I(subincome^2)','socialclass','polideo','supdemoc','satdemoc','govtrust','juditrust','congpartrust','educgroup','age','I(age^2)','female','Constant'),
          covariate.labels = c("Subjective Gini","Subjective Household Income","Subjective Household Income^2","Subjective Social Class","Political Ideology ","Support for Democracy","Satisfaction with Democracy","Trust in Government","Trust in Judiciary","Trust in Congress/Parliament","Education Group","Age","Age^2","Female"),
          model.names = FALSE,
          omit.stat = c("f","ser","rsq"),
          font.size = 'scriptsize',
          dep.var.labels.include = FALSE,
          dep.var.caption = 'Dependent variable: classtax',
          star.cutoffs = c(0.05,0.01,0.001))

####Multiple imputation####
ineqtax %>% dplyr::filter(city < 0)
ineqtax2 <- ineqtax %>% dplyr::select(-c(year,region,ineqna,sumper,inqtolerance,incomeinqworst,richpoorworst,q1per,q2per,q3per,q4per,q5per)) %>% dplyr::mutate(subincome = ifelse(subincome < 1,NA,subincome),
                                                                                                                                                               polideo = ifelse(polideo == 97 | polideo < 0,NA,polideo),
                                                                                                                                                               supdemoc = ifelse(supdemoc < 1,NA,supdemoc),
                                                                                                                                                               govtrust = ifelse(govtrust < 1,NA,govtrust),
                                                                                                                                                               satdemoc = ifelse(satdemoc < 1,NA,satdemoc),
                                                                                                                                                               socialclass = ifelse(socialclass < 1,NA,socialclass),
                                                                                                                                                               juditrust = ifelse(juditrust < 1,NA,juditrust),
                                                                                                                                                               congpartrust = ifelse(congpartrust < 1,NA,congpartrust),
                                                                                                                                                               educgroup = ifelse(educgroup < 1,NA,educgroup),
                                                                                                                                                               citysize = ifelse(citysize < 1,NA,citysize),
                                                                                                                                                               city = ifelse(city < 1,NA,city),
                                                                                                                                                               countryregion = ifelse(countryregion < 1,NA,countryregion),
                                                                                                                                                               unfairness.dist = ifelse(unfairness.dist < 1,NA,unfairness.dist),
                                                                                                                                                               fightcrime = ifelse(fightcrime < 1,NA,fightcrime),
                                                                                                                                                               localauthority = ifelse(localauthority < 1,NA,localauthority),
                                                                                                                                                               trustpubhosp = ifelse(trustpubhosp < 1,NA,trustpubhosp),
                                                                                                                                                               complaint = ifelse(complaint < 1,NA,complaint),
                                                                                                                                                               meritocracy = ifelse(meritocracy < 0,NA,meritocracy),
                                                                                                                                                               taxprog = ifelse(taxprog < 1, NA,taxprog))
ineqtax2.imp <- mice(ineqtax2, m = 5,method = 'cart')
plot(ineqtax2.imp)
summary(ineqtax2.imp)
m4.imp <- with(ineqtax2.imp,
               lm(taxthreshold ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
                    juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country)))
summary(m4.imp)
m4.imp.p <- pool(m4.imp)
summary(m4.imp.p)

m5.imp <- with(ineqtax2.imp,
               glm(masstax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
                     juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
                   family = binomial))
summary(m5.imp)
m5.imp.p <- pool(m5.imp)
summary(m5.imp.p)

m6.imp <- with(ineqtax2.imp,
               glm(classtax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
                     juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
                   family = binomial))
summary(m6.imp)
m6.imp.p <- pool(m6.imp)
summary(m6.imp.p)

library(modelsummary)
row.obs <-  data.frame('Observations','10618','10618','10618')

####Table A7####
modelsummary(models = list('taxthreshold' = m4.imp.p,'masstax' = m5.imp.p,'classtax' = m6.imp.p),fmt = 3,stars = c('*' = .05,'**' = .01,'***' = .001),
             coef_omit = c(1,16:32),
             coef_rename = c("Subjective Gini","Subjective Household Income","Subjective Household Income^2","Subjective Social Class","Political Ideology ","Support for Democracy","Satisfaction with Democracy","Trust in Government","Trust in Judiciary","Trust in Congress/Parliament","Education Group","Age","Age^2","Female"),
             gof_map = "adj.r.squared",
             output = 'latex',
             title = 'Results of models with multiple imputation',
             add_rows = row.obs)

#Robustness check for the definition of the variable taxthreshold####
#by excluding the values 11 and 12
ineqtax_th <- ineqtax %>% dplyr::filter(dplyr::between(taxthreshold,1,10))
ineqtaxext_th <- ineqtaxext %>% dplyr::filter(dplyr::between(taxthreshold,1,10))
m1_th <-
  lm(taxthreshold ~ giniper + factor(country),
     data = ineqtax_th)
summary(m1_th)


m4_th <-
  lm(
    taxthreshold ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
    data = ineqtaxext_th
  )
summary(m4_th)

####Table A8####
stargazer(m1_th,m4_th,
          title = 'Robustness checks - definition of taxthreshold',
          keep = 'giniper',
          covariate.labels = "Subjective Gini",
          dep.var.caption = 'Dependent variable: taxthreshold',
          dep.var.labels.include = FALSE,
          add.lines = list(c('Control variables','No','Yes')),
          omit.stat = c("f","ser","rsq"),
          star.cutoffs = c(0.05,0.01,0.001))


#Sensitivity checks with different threshold levels for mass taxation####
ineqtax <-
  ineqtax %>% dplyr::mutate(
    masstax4 = ifelse(dplyr::between(taxgroup, 1, 4), 1, ifelse(taxgroup == 11, 1, 0)),
    masstax6 = ifelse(dplyr::between(taxgroup, 1, 6), 1, ifelse(taxgroup == 11, 1, 0)),
    masstax3 = ifelse(dplyr::between(taxgroup, 1, 3), 1, ifelse(taxgroup == 11, 1, 0)),
    masstax7 = ifelse(dplyr::between(taxgroup, 1, 7), 1, ifelse(taxgroup == 11, 1, 0))
  )

ineqtaxext <-
  ineqtaxext %>% dplyr::mutate(
    masstax4 = ifelse(dplyr::between(taxgroup, 1, 4), 1, ifelse(taxgroup == 11, 1, 0)),
    masstax6 = ifelse(dplyr::between(taxgroup, 1, 6), 1, ifelse(taxgroup == 11, 1, 0)),
    masstax3 = ifelse(dplyr::between(taxgroup, 1, 3), 1, ifelse(taxgroup == 11, 1, 0)),
    masstax7 = ifelse(dplyr::between(taxgroup, 1, 7), 1, ifelse(taxgroup == 11, 1, 0))
  )



#baseline models
# m1b <-
#   glm(masstax4 ~ giniper + factor(country),
#       data = ineqtax,
#       family = binomial,
#   )
# summary(m1b)


m4b <-
  glm(masstax4 ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext
  )
summary(m4b)


m4c <-
  glm(masstax6 ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext
  )
summary(m4c)

m4d <-
  glm(masstax3 ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext
  )
summary(m4d)

m4e <-
  glm(masstax7 ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext
  )
summary(m4e)

####Table A9####
stargazer(m4b,m4c,m4d,m4e,
          title = 'Robustness checks - Model specification',
          keep = 'giniper',
          covariate.labels = "Subjective Gini",
          dep.var.labels = c('masstax4','masstax6','masstax3','masstax7'),
          dep.var.caption = 'Dependent variable',
          add.lines = list(c('Control variables','Yes','Yes','Yes','Yes','Yes')),
          omit.stat = c("f","ser","rsq"),
          star.cutoffs = c(0.05,0.01,0.001),
          font.size = 'footnotesize')


####Table A10####
# stargazer(m1,m4,m2,m5,m3,m6,
#           keep = 'giniper',
#           covariate.labels = "Subjective Gini",
#           title = 'Robustness checks – Subjective Gini (treatment of missing values)',
#           dep.var.labels = c('taxthreshold','masstax','classtax'),
#           add.lines = list(c('Control variables','No','Yes','No','Yes','No','Yes')),
#           omit.stat = c("f","ser","rsq"),
#           star.cutoffs = c(0.05,0.01,0.001),
#           model.names = FALSE,
#           font.size = 'scriptsize',
#           dep.var.caption = 'Dependent variable')

####Table A11####
# stargazer(m1,m4,m2,m5,m3,m6,
#           keep = 'giniper',
#           covariate.labels = "Subjective Gini",
#           title = 'Robustness checks – Subjective Gini (baseline assumptions)',
#           dep.var.labels = c('taxthreshold','masstax','classtax'),
#           add.lines = list(c('Control variables','No','Yes','No','Yes','No','Yes')),
#           omit.stat = c("f","ser","rsq"),
#           star.cutoffs = c(0.05,0.01,0.001),
#           model.names = FALSE,
#           font.size = 'scriptsize',
#           dep.var.caption = 'Dependent variable')

####Different dependent variable####
#creating a new variable named selftax - indicating the willingness of individuals to be included in the tax base
ineqtax_st <- ineqtax %>% dplyr::filter(subincome >= 1) %>% dplyr::mutate(selftax = ifelse(subincome >= taxgroup | taxgroup == 11, 1, 0))
ineqtaxext <- ineqtaxext %>% dplyr::mutate(selftax = ifelse(subincome >= taxgroup | taxgroup == 11, 1, 0))
table(ineqtaxext$selftax)
m7 <-
  glm(selftax ~ giniper + factor(country),
      family = binomial,
      data = ineqtax_st
  )
summary(m7)

m8 <-
  glm(selftax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = ineqtaxext
  )
summary(m8)

####Table A12####
stargazer(m7,m8,
          keep = 'giniper',
          covariate.labels = "Subjective Gini",
          title = 'Robustness checks – Different dependent variable',
          add.lines = list(c('Control variables','No','Yes')),
          omit.stat = c("f","ser","rsq"),
          star.cutoffs = c(0.05,0.01,0.001),
          dep.var.labels.include = FALSE,
          dep.var.caption = 'Dependent variable: selftax',
          model.names = FALSE)

###Decomposition of the subjective Gini by quintile####

m4.decomp1 <-
  lm(
    taxthreshold ~ q5per + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
    data = ineqtaxext
  )
summary(m4.decomp1)
m4.decomp2 <-
  lm(
    taxthreshold ~ q4per + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
    data = ineqtaxext
  )
summary(m4.decomp2)
m4.decomp3 <-
  lm(
    taxthreshold ~ q3per + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
    data = ineqtaxext
  )
summary(m4.decomp3)
m4.decomp4 <-
  lm(
    taxthreshold ~ q2per + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
    data = ineqtaxext
  )
summary(m4.decomp4)
m4.decomp5 <-
  lm(
    taxthreshold ~ q1per + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
    data = ineqtaxext
  )
summary(m4.decomp5)


####Table A13####
stargazer(
  m4.decomp1,m4.decomp2,m4.decomp3,m4.decomp4,m4.decomp5,
  title = 'Decomposition of the subjective Gini coefficient',
  model.names = FALSE,
  keep = c('q5per','q4per','q3per','q2per','q1per'),
  covariate.labels = c("Upper Quintile","4th Quintile","3rd Quintile","2nd Quintile","Bottom Quintile"),
  add.lines = list(c('Control variables','Yes','Yes','Yes','Yes','Yes')),
  omit.stat = c("f","ser","rsq"),
  star.cutoffs = c(0.05,0.01,0.001),
  dep.var.labels.include = FALSE,
  dep.var.caption = 'Dependent variable: taxthreshold'
)

####Normative inequality measures ####
tax <-
  tax %>% dplyr::filter(
    !(
      subincome < 1 |
        polideo == 97 |
        polideo < 0 |
        supdemoc < 1 |
        govtrust < 1 | satdemoc < 1 | socialclass < 1 | juditrust < 1 |
        congpartrust < 1 | educgroup < 1 | inqtolerance < 1 | incomeinqworst < 0 | richpoorworst < 0
    )
  )

table(tax$incomeinqworst)
table(tax$richpoorworst)

normi1 <-
  lm(taxthreshold ~ inqtolerance + subincome + I(subincome^2) +  socialclass + polideo + supdemoc + satdemoc + govtrust +
       juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
     data = tax
  )
summary(normi1)

normi2 <-
  glm(masstax ~ inqtolerance + subincome + I(subincome^2) +  socialclass + polideo + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = tax
  )
summary(normi2)

normi3 <-
  lm(taxthreshold ~ incomeinqworst + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
       juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
     data = tax
  )
summary(normi3)

normi4 <-
  glm(masstax ~ incomeinqworst + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = tax
  )
summary(normi4)

normi5 <-
  lm(taxthreshold ~ richpoorworst + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
       juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
     data = tax
  )
summary(normi5)

normi6 <-
  glm(masstax ~ richpoorworst + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
        juditrust + congpartrust + educgroup + age + I(age^2) + female + factor(country),
      family = binomial,
      data = tax
  )
summary(normi6)



####Table A14####
stargazer(
  normi1,normi2,normi3,normi4,normi5,normi6,
  title = 'Normative inequality and support for mass taxation',
  model.names = FALSE,
  keep = c('inqtolerance','incomeinqworst','richpoorworst'),
  covariate.labels = c("Tolerance for Inequality","Worst Inequality: \nIncome","Worst Inequality: \nBetween Rich and Poor"),
  add.lines = list(c('Control variables','Yes','Yes','Yes','Yes','Yes','Yes')),
  omit.stat = c("f","ser","rsq"),
  star.cutoffs = c(0.05,0.01,0.001),
  font.size = 'scriptsize',
  dep.var.caption = 'Dependent variable'
)


#Mediation analysis####

ineqtaxext_med <- ineqtaxext %>% dplyr::filter(unfairness.dist >= 1)
#create a new variable to enable the mediate function to work with a factor variable
ineqtaxext_med <- ineqtaxext_med %>% dplyr::mutate(country2 = factor(country))
table(ineqtaxext_med$unfairness.dist)

med1a <-
  lm(
    unfairness.dist ~ giniper + country2,
    data = ineqtaxext_med
  )
summary(med1a)

med1b <-
  lm(
    taxthreshold ~ giniper + unfairness.dist + country2,
    data = ineqtaxext_med
  )
summary(med1b)

med1.results <- mediate(med1a,med1b,treat = 'giniper',mediator = 'unfairness.dist',boot = TRUE)
summary(med1.results)
plot(med1.results)

med2a <-
  lm(
    unfairness.dist ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + country2,
    data = ineqtaxext_med
  )
summary(med2a)

med2b <-
  lm(
    taxthreshold ~ giniper + unfairness.dist + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + country2,
    data = ineqtaxext_med
  )
summary(med2b)

med2.results <- mediate(med2a,med2b,treat = 'giniper',mediator = 'unfairness.dist',boot = TRUE)
summary(med2.results)
plot(med2.results)

#Reciprocity
ineqtaxext_med2 <- ineqtaxext %>% dplyr::filter(localauthority >= 1 & trustpubhosp >= 1 & complaint >= 1) %>%
  dplyr::mutate(reciprocity_ind =  ifelse(localauthority == 4,1,ifelse(localauthority == 3,2,ifelse(localauthority == 2,3,4))) * 1/3 +
                  ifelse(trustpubhosp == 4,1,ifelse(trustpubhosp == 3,2,ifelse(trustpubhosp == 2,3,4))) * 1/3 +
                  ifelse(complaint == 4,1,ifelse(complaint == 3,2,ifelse(complaint == 2,3,4))) * 1/3)
#create a new variable to enable the mediate function to work with a factor variable
ineqtaxext_med2 <- ineqtaxext_med2 %>% dplyr::mutate(country2 = factor(country))
table(ineqtaxext_med2$reciprocity_ind)
#scale the reciprocity index between 0 and 1
#define function to rescale variables to range between 0 and 1
scale_values <- function(x){(x - min(x))/(max(x)-min(x))}
ineqtaxext_med2$reciprocity <- scale_values(ineqtaxext_med2$reciprocity_ind)
table(ineqtaxext_med2$reciprocity)
summary(ineqtaxext_med2$reciprocity)
#verify that index values set up correctly
ineqtaxext_med2 %>% dplyr::select(localauthority,trustpubhosp,complaint,reciprocity_ind,reciprocity)


med3a <-
  lm(
    reciprocity ~ giniper + country2,
    data = ineqtaxext_med2
  )
summary(med3a)

med3b <-
  lm(
    taxthreshold ~ giniper + reciprocity + country2,
    data = ineqtaxext_med2
  )
summary(med3b)

med3.results <- mediate(med3a,med3b,treat = 'giniper',mediator = 'reciprocity',boot = TRUE)
summary(med3.results)
plot(med3.results)

med4a <-
  lm(
    reciprocity ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + country2,
    data = ineqtaxext_med2
  )
summary(med4a)

med4b <-
  lm(
    taxthreshold ~ giniper + reciprocity + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + country2,
    data = ineqtaxext_med2
  )
summary(med4b)

med4.results <- mediate(med4a,med4b,treat = 'giniper',mediator = 'reciprocity',boot = TRUE)
summary(med4.results)
plot(med4.results)

#Meritocracy
table(ineqtaxext$meritocracy)
ineqtaxext_med3 <- ineqtaxext %>% dplyr::filter(meritocracy >= 0)
#create a new variable to enable the mediate function to work with a factor variable
ineqtaxext_med3 <- ineqtaxext_med3 %>% dplyr::mutate(country2 = factor(country))

med5a <-
  glm(
    meritocracy ~ giniper + country2,
    family = binomial,
    data = ineqtaxext_med3
  )
summary(med5a)

med5b <-
  lm(
    taxthreshold ~ giniper + meritocracy + country2,
    data = ineqtaxext_med3
  )
summary(med5b)

med5.results <- mediate(med5a,med5b,treat = 'giniper',mediator = 'meritocracy',boot = TRUE)
summary(med5.results)
plot(med5.results)

med6a <-
  glm(
    meritocracy ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + country2,
    family = binomial,
    data = ineqtaxext_med3
  )
summary(med6a)

med6b <-
  lm(
    taxthreshold ~ giniper + meritocracy + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust +
      juditrust + congpartrust + educgroup + age + I(age^2) + female + country2,
    data = ineqtaxext_med3
  )
summary(med6b)

med6.results <- mediate(med6a,med6b,treat = 'giniper',mediator = 'meritocracy',boot = TRUE)
summary(med6.results)
plot(med6.results)

####Table A15####
stargazer(med1a,med1b,med2a,med2b,
          title = 'Regression models -- unfairness',
          keep = c('giniper','unfairness.dist'),
          covariate.labels = c("Subjective Gini","Unfairness"),
          model.names = FALSE,
          dep.var.labels = c("unfairness","taxthreshold","unfairness","taxthreshold"),
          dep.var.caption = 'Dependent variable',
          add.lines = list(c('Control variables','No','No','Yes','Yes')),
          omit.stat = c("f","ser","rsq"),
          star.cutoffs = c(0.05,0.01,0.001),
          font.size = 'footnotesize')

####Table A16####
stargazer(med3a,med3b,med4a,med4b,
          title = 'Regression models -- reciprocity',
          keep = c('giniper','reciprocity'),
          covariate.labels = c("Subjective Gini","Reciprocity"),
          dep.var.labels = c("reciprocity","taxthreshold","reciprocity","taxthreshold"),
          model.names = FALSE,
          dep.var.caption = 'Dependent variable',
          add.lines = list(c('Control variables','No','No','Yes','Yes')),
          omit.stat = c("f","ser","rsq"),
          star.cutoffs = c(0.05,0.01,0.001),
          font.size = 'footnotesize')

####Table A17####
stargazer(med5a,med5b,med6a,med6b,
          title = 'Regression models -- meritocracy',
          keep = c('giniper','meritocracy'),
          covariate.labels = c("Subjective Gini","Meritocracy"),
          dep.var.labels = c("meritocracy","taxthreshold","meritocracy","taxthreshold"),
          model.names = FALSE,
          dep.var.caption = 'Dependent variable',
          add.lines = list(c('Control variables','No','No','Yes','Yes')),
          omit.stat = c("f","ser","rsq"),
          star.cutoffs = c(0.05,0.01,0.001),
          font.size = 'footnotesize')

####Table 2####
r1 <-  data.frame('Control variables','Yes','Yes','Yes')
r1[2, ] <- c('Observations','7340','7316','7371')
library(modelsummary)
modelsummary(title = 'Results of mediation analysis',list("(1) M: Unfairness" = med2.results,
                                                          "(2) M: Reciprocity" = med4.results,
                                                          "(3) M: Meritocracy" = med6.results),
             statistic = "conf.int",fmt = 3,stars = c('*' = .05,'**' = .01,'***' = .001),
             output = 'latex',add_rows = r1)

#sensitivity checks for unfairness 
meds <- medsens(med2.results,rho.by = 0.1,effect.type = 'indirect')
summary(meds)
####Figure A5####
# pdf(file = 'Figures/PDF/figurea5.pdf',width = 10)
plot(meds,sens.par = 'rho',main = 'Unfairness',ylim = c(-0.5,0.5))
# dev.off()


####Colombia case study####


ineqtax_col <- ineqtax %>% dplyr::filter(country == 'Colombia')
ineqtaxext_col <- ineqtaxext %>% dplyr::filter(country == 'Colombia')

m1.col <-
  lm(taxthreshold ~ giniper,
     data = ineqtax_col)
summary(m1.col)


m4.col <-
  lm(
    taxthreshold ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
      juditrust + congpartrust + educgroup + age + I(age^2) + female,
    data = ineqtaxext_col
  )
summary(m4.col)

m2.col <-
  glm(masstax ~ giniper,
      family = binomial,
      data = ineqtax_col)
summary(m2.col)

m5.col <-
  glm(masstax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
        juditrust + congpartrust + educgroup + age + I(age^2) + female,
      family = binomial,
      data = ineqtaxext_col
  )
summary(m5.col)

m3.col <-
  glm(classtax ~ giniper,
      family = binomial,
      data = ineqtax_col)
summary(m3.col)

m6.col <-
  glm(classtax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
        juditrust + congpartrust + educgroup + age + I(age^2) + female,
      family = binomial,
      data = ineqtaxext_col
  )
summary(m6.col)

#by social class
ineqtaxext_col <- ineqtaxext_col %>% dplyr::mutate(
  socialclassfactor = factor(ifelse(socialclass <= 2,'Upper class',
                                    ifelse(socialclass >= 4,'Lower class',
                                           'Middle class')),levels = c('Upper class', 
                                                                       'Middle class',
                                                                       'Lower class')))

table(ineqtaxext_col$socialclassfactor)
round(prop.table(table(ineqtaxext_col$socialclassfactor)),3)
ineqtaxext_col$socialclassfactor <- relevel(ineqtaxext_col$socialclassfactor, ref = 'Middle class')
m4.col.int <-
  lm(
    taxthreshold ~ giniper*socialclassfactor + subincome + I(subincome^2) + polideo + supdemoc + satdemoc + govtrust + 
      juditrust + congpartrust + educgroup + age + I(age^2) + female,
    data = ineqtaxext_col
  )
summary(m4.col.int)

####Table A18####
stargazer(m1.col,m4.col,m2.col,m5.col,m3.col,m6.col,m4.col.int,
          title = 'Preferences over the size of the tax base in Colombia',
          dep.var.labels = c('taxthreshold','masstax','classtax','taxthreshold'),
          keep = c('giniper',"socialclassfactor"),
          covariate.labels = c("Subjective Gini","Upper Class",
                               "Lower Class","Subjective Gini X Upper Class",
                               "Subjective Gini X Lower Class"),
          add.lines = list(c('Control variables','No','Yes','No','Yes','No','Yes','Yes')),
          model.names = FALSE,
          dep.var.caption = 'Dependent variable',
          omit.stat = c("f","ser","rsq"),
          star.cutoffs = c(0.05,0.01,0.001),
          font.size = 'scriptsize')
round((mean(ineqtaxext_col$taxthreshold) + (sd(ineqtaxext_col$giniper)*4.4866281)) / mean(ineqtaxext_col$taxthreshold),3)
round(sd(ineqtaxext_col$giniper)*4.4866281,2)
round(sd(ineqtaxext$giniper)*0.8508480,2)

col.int.viz <- interact_plot(m4.col.int, pred = 'giniper',modx = 'socialclassfactor',
                             interval = TRUE,x.label = 'Subjective Gini coefficient',
                             y.label = 'Predicted values of taxthreshold',
                             modx.values = c('Upper class','Middle class','Lower class'),
                             legend.main = '')
####Figure 5####
# pdf(file = 'Figures/PDF/figure5.pdf',width = 10)
col.int.viz + theme(legend.text = element_text(size = 14))
# dev.off()

#####Desiguales 2016####
#Loading Encuesta Desigualdades Ecónomicas y Sociales (Desiguales) 2016 survey data 
des2016 <- read_csv('DES_2016_csv-2.csv')
mean(des2016$p36_4)
table(des2016$p9_2)
table(des2016$p86)
table(des2016$macrozona)
#renaming variables
des.dist <- des2016 %>% rename(perineq.dist = p9_2,perineq.opp = p9_5,perineq = p8,
                               oppmoretax = p36_4, subincome = p67, socialclass = p14,
                               satdemoc = p34,polideo = p86, educgroup = p51,
                               sex = sexo,age = edad, supdemoc = p33
)

des.dist <- des.dist %>% dplyr::filter(!(perineq.dist > 10 | oppmoretax > 5| subincome > 6 
                                         | satdemoc > 5 | socialclass > 5 | polideo > 7 
                                         | educgroup > 9 | supdemoc > 3)) 
mean_polideo <- mean(des.dist$polideo[des.dist$polideo <= 5])
des.dist <- des.dist %>% dplyr::mutate(female = as.numeric(sex == 2),
                                       supdemoc = as.numeric(supdemoc == 1))
des.dist <- des.dist %>% dplyr::mutate(polideo = ifelse(polideo %in% c(6,7),mean_polideo,polideo))

d1 <- lm(oppmoretax ~ perineq.dist,data = des.dist)
summary(d1)

d2 <- lm(oppmoretax ~ perineq.dist + subincome + I(subincome^2) + socialclass + polideo +  
           supdemoc + satdemoc + educgroup + age + I(age^2) + female,
         data = des.dist)
summary(d2)

d3 <- lm(oppmoretax ~ perineq.dist + subincome +  I(subincome^2) + socialclass + polideo +  
           supdemoc + satdemoc + educgroup + age + I(age^2) + female + factor(macrozona),
         data = des.dist)
summary(d3)

des.opp <- des2016 %>% rename(perineq.dist = p9_2,perineq.opp = p9_5,perineq = p8,
                              oppmoretax = p36_4, subincome = p67, socialclass = p14,
                              satdemoc = p34,polideo = p86, educgroup = p51,
                              sex = sexo,age = edad, supdemoc = p33
)

des.opp <- des.opp %>% dplyr::filter(!(perineq.opp > 10 | oppmoretax > 5| subincome > 6 
                                       | satdemoc > 5 | socialclass > 5 | polideo > 7 
                                       | educgroup > 9 | supdemoc > 3)) 
mean_polideo2 <- mean(des.opp$polideo[des.opp$polideo <= 5])
des.opp <- des.opp %>% dplyr::mutate(female = as.numeric(sex == 2),
                                     supdemoc = as.numeric(supdemoc == 1))
des.opp <- des.opp %>% dplyr::mutate(polideo = ifelse(polideo %in% c(6,7),mean_polideo2,polideo))


d4 <- lm(oppmoretax ~ perineq.opp + subincome + I(subincome^2) + socialclass + polideo +  
           supdemoc + satdemoc + educgroup + age + I(age^2) + female,
         data = des.opp)
summary(d4)


des.pi <- des2016 %>% rename(perineq.dist = p9_2,perineq.opp = p9_5,perineq = p8,
                             oppmoretax = p36_4, subincome = p67, socialclass = p14,
                             satdemoc = p34,polideo = p86, educgroup = p51,
                             sex = sexo,age = edad, supdemoc = p33
)

des.pi <- des.pi %>% dplyr::filter(!(perineq > 10 | oppmoretax > 5| subincome > 6 
                                     | satdemoc > 5 | socialclass > 5 | polideo > 7 
                                     | educgroup > 9 | supdemoc > 3)) 
mean_polideo3 <- mean(des.pi$polideo[des.pi$polideo <= 5])
des.pi <- des.pi %>% dplyr::mutate(female = as.numeric(sex == 2),
                                   supdemoc = as.numeric(supdemoc == 1))
des.pi <- des.pi %>% dplyr::mutate(polideo = ifelse(polideo %in% c(6,7),mean_polideo3,polideo))

d5 <- lm(oppmoretax ~ perineq + subincome + I(subincome^2) + socialclass + polideo +  
           supdemoc + satdemoc + educgroup + age + I(age^2) + female,data = des.pi)
summary(d5)

#Table 3
stargazer(d1,d2,d3,d4,d5,
          title = 'Determinants of unwillingness to pay more taxes in Chile',
          keep = c('perineq.dist','perineq.opp','perineq'),
          covariate.labels = c("Perceived Inequality (Income)", "Perceived Inequality (Opportunity)", "Perceived Inequality"),
          model.names = FALSE,
          dep.var.caption = 'Dependent variable: Unwillingness to pay more taxes',
          omit.stat = c("f","ser","rsq"),
          add.lines = list(c('Control variables','No','Yes','Yes','Yes','Yes')),
          font.size = 'footnotesize',
          dep.var.labels.include = FALSE,
          star.cutoffs = c(0.05,0.01,0.001))

####Table A19####
stargazer(d1,d2,d3,d4,d5,
          title = 'Determinants of unwillingness to pay more taxes in Chile - full results',
          keep = c('perineq.dist','perineq.opp','perineq','subincome','I(subincome^2)','socialclass','polideo','supdemoc','satdemoc','educgroup','age','I(age^2)','female','Constant'),
          covariate.labels = c("Perceived Inequality (Income)", "Perceived Inequality (Opportunity)", "Perceived Inequality","Subjective Household Income","Subjective Household Income^2","Subjective Social Class","Political Ideology ","Support for Democracy","Satisfaction with Democracy","Education Group","Age","Age^2","Female"),
          model.names = FALSE,
          dep.var.caption = 'Dependent variable: Unwillingness to pay more taxes',
          omit.stat = c("f","ser","rsq"),
          font.size = 'scriptsize',
          dep.var.labels.include = FALSE,
          star.cutoffs = c(0.05,0.01,0.001))



des2016.2 <- des2016 %>% rename(perineq.dist = p9_2,perineq.opp = p9_5,perineq = p8,
                                oppmoretax = p36_4, subincome = p67, socialclass = p14,
                                satdemoc = p34,polideo = p86, educgroup = p51,
                                sex = sexo,age = edad, supdemoc = p33
)
table(des2016$region)

des2016.2 <- des2016.2 %>% dplyr::filter(!(perineq.dist > 10 | oppmoretax > 5))
des2016.2 <-  des2016.2 %>% dplyr::mutate(perineq.opp = ifelse(perineq.opp > 10,NA,perineq.opp),
                                          perineq = ifelse(perineq > 10,NA,perineq),
                                          subincome = ifelse(subincome > 6,NA,subincome),
                                          satdemoc = ifelse(satdemoc > 5,NA,satdemoc),
                                          socialclass = ifelse(socialclass > 5,NA,socialclass),
                                          polideo = ifelse(polideo > 5,NA,polideo),
                                          educgroup = ifelse(educgroup > 9,NA,educgroup),
                                          supdemoc = ifelse(supdemoc > 3,NA,supdemoc))
des2016.2 <- des2016.2 %>% 
  dplyr::select(zona,macrozona,region,perineq.dist,perineq.opp,perineq,oppmoretax,subincome,socialclass,satdemoc,polideo,educgroup,sex,age,supdemoc)
des2016.2 <- des2016.2 %>% dplyr::mutate(female = as.numeric(sex == 2),
                                         supdemoc = as.numeric(supdemoc == 1)) %>% dplyr::select(-sex)
####Multiple imputation####
des2.imp <- mice(des2016.2, m = 5,method = 'cart')

plot(des2.imp)
summary(des2.imp)
d2.imp <- with(des2.imp,
               lm(oppmoretax ~ perineq.dist + subincome + I(subincome^2) + socialclass + polideo +  
                    supdemoc + satdemoc + educgroup + age + I(age^2) + female))
summary(d2.imp)
d2.imp.p <- pool(d2.imp)
summary(d2.imp.p)

d3.imp <- with(des2.imp,
               lm(oppmoretax ~ perineq.dist + subincome + I(subincome^2) + socialclass + polideo +  
                    supdemoc + satdemoc + educgroup + age + I(age^2) + female + factor(macrozona)))
summary(d3.imp)
d3.imp.p <- pool(d3.imp)
summary(d3.imp.p)

row.obs2 <-  data.frame('Observations','2481','2481') 

####Table A20####
library(modelsummary)
modelsummary(models = list(d2.imp.p,d3.imp.p),fmt = 3,stars = c('*' = .05,'**' = .01,'***' = .001),
             gof_map =  "adj.r.squared",
             coef_omit = c(1,13:15),
             coef_rename = c("Perceived Inequality (Income)","Subjective Household Income","Subjective Household Income^2","Subjective Social Class","Political Ideology ","Support for Democracy","Satisfaction with Democracy","Education Group","Age","Age^2","Female"),
             title = 'Determinants of unwillingness to pay more taxes in Chile (with multiple imputation)',
             output = 'latex',add_rows = row.obs2)


####Baseline analysis - Chile####


ineqtax_chi <- ineqtax %>% dplyr::filter(country == 'Chile')
ineqtaxext_chi <- ineqtaxext %>% dplyr::filter(country == 'Chile')

m1.chi <-
  lm(taxthreshold ~ giniper,
     data = ineqtax_chi)
summary(m1.chi)


m4.chi <-
  lm(
    taxthreshold ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
      juditrust + congpartrust + educgroup + age + I(age^2) + female,
    data = ineqtaxext_chi
  )
summary(m4.chi)

m2.chi <-
  glm(masstax ~ giniper,
      family = binomial,
      data = ineqtax_chi)
summary(m2.chi)

m5.chi <-
  glm(masstax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
        juditrust + congpartrust + educgroup + age + I(age^2) + female,
      family = binomial,
      data = ineqtaxext_chi
  )
summary(m5.chi)

m3.chi <-
  glm(classtax ~ giniper,
      family = binomial,
      data = ineqtax_chi)
summary(m3.chi)

m6.chi <-
  glm(classtax ~ giniper + subincome + I(subincome^2) + socialclass + polideo + supdemoc + satdemoc + govtrust + 
        juditrust + congpartrust + educgroup + age + I(age^2) + female,
      family = binomial,
      data = ineqtaxext_chi
  )
summary(m6.chi)

####Tables A21,A22####
#Table A22 has the specification of Table A11
#"(modifying baseline assumptions)" should be added to Table A22 in the title argument below
stargazer(m1.chi,m4.chi,m2.chi,m5.chi,m3.chi,m6.chi,
          title = 'Preferences over the size of the tax base in Chile',
          dep.var.labels = c('taxthreshold','masstax','classtax'),
          keep = 'giniper',
          covariate.labels = "Subjective Gini",
          add.lines = list(c('Control variables','No','Yes','No','Yes','No','Yes')),
          model.names = FALSE,
          dep.var.caption = 'Dependent variable',
          omit.stat = c("f","ser","rsq"),
          star.cutoffs = c(0.05,0.01,0.001),
          font.size = 'footnotesize')

